Here, we load the surprisal data for the modified stimuli from Bradford et al (2020), calculated from each of the models.
Original stimuli were obtained from OSF: https://osf.io/pw7h6/.
# setwd("/Users/seantrott/Dropbox/UCSD/Research/NLMs/nlm-fb/src/analysis")
df_fb_bl = read_csv("../../data/processed/bradford-fb-modified_bert-large_surprisals.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## `Unnamed: 0` = col_double(),
## Item = col_double(),
## Condition = col_double(),
## `Condition code` = col_character(),
## Scenario = col_character(),
## Modified = col_character(),
## First_mention = col_character(),
## Recent_mention = col_character(),
## prior_mentions_start = col_double(),
## prior_mentions_end = col_double(),
## num_sentences = col_double(),
## probability = col_double(),
## belief = col_character(),
## consistency = col_character(),
## final_word = col_character(),
## masked_passages = col_character()
## )
nrow(df_fb_bl)
## [1] 2400
df_fb_bl = df_fb_bl %>%
mutate(condition = `Condition code`,
log_prob = log2(probability),
surprisal = -log_prob)
table(df_fb_bl$condition, df_fb_bl$First_mention)
##
## End Start
## FB-C 120 480
## FB-IC 120 480
## TB-C 120 480
## TB-IC 120 480
table(df_fb_bl$condition, df_fb_bl$consistency)
##
## C IC
## FB-C 600 0
## FB-IC 0 600
## TB-C 600 0
## TB-IC 0 600
table(df_fb_bl$condition, df_fb_bl$belief)
##
## FB TB
## FB-C 600 0
## FB-IC 600 0
## TB-C 0 600
## TB-IC 0 600
table(df_fb_bl$consistency, df_fb_bl$belief)
##
## FB TB
## C 600 600
## IC 600 600
df_fb_bl = df_fb_bl %>%
mutate(mentions_ratio = prior_mentions_start / prior_mentions_end)
df_fb_gpt3 = read_csv("../../data/processed/bradford-fb-modified_gpt3_surprisals.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## `Unnamed: 0` = col_double(),
## Item = col_double(),
## Condition = col_double(),
## `Condition code` = col_character(),
## Scenario = col_character(),
## Modified = col_character(),
## First_mention = col_character(),
## Recent_mention = col_character(),
## prior_mentions_start = col_double(),
## prior_mentions_end = col_double(),
## num_sentences = col_double(),
## log_prob = col_double(),
## belief = col_character(),
## consistency = col_character()
## )
nrow(df_fb_gpt3)
## [1] 2400
df_fb_gpt3 = df_fb_gpt3 %>%
mutate(condition = `Condition code`,
surprisal = -log_prob)
table(df_fb_gpt3$condition)
##
## FB-C FB-IC TB-C TB-IC
## 600 600 600 600
table(df_fb_gpt3$condition, df_fb_gpt3$consistency)
##
## C IC
## FB-C 600 0
## FB-IC 0 600
## TB-C 600 0
## TB-IC 0 600
table(df_fb_gpt3$condition, df_fb_gpt3$belief)
##
## FB TB
## FB-C 600 0
## FB-IC 600 0
## TB-C 0 600
## TB-IC 0 600
table(df_fb_gpt3$consistency, df_fb_gpt3$belief)
##
## FB TB
## C 600 600
## IC 600 600
df_fb_gpt3 = df_fb_gpt3 %>%
mutate(mentions_ratio = prior_mentions_start / prior_mentions_end)
## Density version
df_fb_bl %>%
filter(Modified == "Yes") %>%
ggplot(aes(x = surprisal,
y = belief,
fill = consistency)) +
geom_density_ridges2(aes(height = ..density..),
color=gray(0.25),
alpha = 0.5,
scale=0.85,
size=.9,
stat="density") +
labs(x = "Surprisal of target word",
y = "Belief condition") +
geom_vline(xintercept = 0, linetype = "dotted") +
theme_bw() +
facet_wrap(~First_mention + Recent_mention,
labeller = label_both)
## Strip chart version
df_fb_bl %>%
ggplot(aes(x = belief,
y = surprisal,
color = consistency)) +
geom_jitter(alpha = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
labs(x = "Condition",
y = "Surprisal (BERT)") +
theme_bw() +
facet_wrap(~Modified + First_mention)
df_fb_bl %>%
ggplot(aes(x = condition,
y = surprisal,
color = First_mention)) +
geom_jitter(alpha = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
labs(x = "Condition",
y = "Surprisal (BERT)") +
theme_bw()
## Density version
df_fb_gpt3 %>%
filter(Modified == "Yes") %>%
ggplot(aes(x = surprisal,
y = belief,
fill = consistency)) +
geom_density_ridges2(aes(height = ..density..),
color=gray(0.25),
alpha = 0.5,
scale=0.85,
size=.9,
stat="density") +
labs(x = "Surprisal of target word",
y = "Belief condition") +
geom_vline(xintercept = 0, linetype = "dotted") +
theme_bw() +
facet_wrap(~First_mention +
Recent_mention,
labeller = label_both)
## Strip chart version
df_fb_gpt3 %>%
ggplot(aes(x = belief,
y = surprisal,
color = consistency)) +
geom_jitter(alpha = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
labs(x = "Condition",
y = "Surprisal (GPT-3)") +
theme_bw() +
facet_wrap(~First_mention + Recent_mention +mentions_ratio,
labeller = label_both,
ncol=2)
# Original condition
df_fb_gpt3 %>%
ggplot(aes(x = condition,
y = surprisal,
color = condition)) +
geom_jitter(alpha = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
labs(x = "Condition",
y = "Surprisal of target word") +
geom_hline(yintercept = 0, linetype = "dotted") +
theme_bw()
## By first mention
df_fb_gpt3 %>%
ggplot(aes(x = condition,
y = surprisal,
color = First_mention)) +
geom_jitter(alpha = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
labs(x = "Condition",
y = "Surprisal (GPT-3)") +
geom_hline(yintercept = 0, linetype = "dotted") +
theme_bw() +
facet_wrap(~Recent_mention,
labeller = label_both)
model_full = lmer(data = df_fb_bl,
surprisal ~ belief * consistency +
First_mention +
Recent_mention +
mentions_ratio +
(1 + belief * consistency | Item),
control=lmerControl(optimizer="bobyqa"),
REML = FALSE)
## boundary (singular) fit: see ?isSingular
model_just_fe = lmer(data = df_fb_bl,
surprisal ~ belief + consistency +
First_mention +
Recent_mention +
mentions_ratio +
(1 + belief * consistency | Item),
control=lmerControl(optimizer="bobyqa"),
REML = FALSE)
## boundary (singular) fit: see ?isSingular
model_no_belief = lmer(data = df_fb_bl,
surprisal ~ consistency +
First_mention +
Recent_mention +
mentions_ratio +
(1 + belief * consistency | Item),
control=lmerControl(optimizer="bobyqa"),
REML = FALSE)
## boundary (singular) fit: see ?isSingular
model_no_con = lmer(data = df_fb_bl,
surprisal ~ belief +
First_mention +
Recent_mention +
mentions_ratio +
(1 + belief * consistency | Item),
control=lmerControl(optimizer="bobyqa"),
REML = FALSE)
## boundary (singular) fit: see ?isSingular
summary(model_full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: surprisal ~ belief * consistency + First_mention + Recent_mention +
## mentions_ratio + (1 + belief * consistency | Item)
## Data: df_fb_bl
## Control: lmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 11859.6 11963.7 -5911.8 11823.6 2382
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.2430 -0.5066 -0.0373 0.5032 4.8401
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Item (Intercept) 21.223 4.607
## beliefTB 58.554 7.652 -0.78
## consistencyIC 61.153 7.820 -0.77 1.00
## beliefTB:consistencyIC 232.682 15.254 0.77 -1.00 -1.00
## Residual 5.563 2.359
## Number of obs: 2400, groups: Item, 120
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.87157 0.44485 135.67961 10.951 <2e-16 ***
## beliefTB 1.49304 0.71168 120.06469 2.098 0.0380 *
## consistencyIC 1.16954 0.72674 120.00084 1.609 0.1102
## First_mentionStart 0.34121 0.14095 2040.00061 2.421 0.0156 *
## Recent_mentionStart 0.24889 0.14950 2040.00047 1.665 0.0961 .
## mentions_ratio -0.22114 0.09967 2040.00039 -2.219 0.0266 *
## beliefTB:consistencyIC -3.35726 1.40574 120.00068 -2.388 0.0185 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) belfTB cnssIC Frst_S Rcnt_S mntns_
## beliefTB -0.750
## consstncyIC -0.745 0.982
## Frst_mntnSt -0.132 0.000 0.000
## Rcnt_mntnSt 0.062 0.000 0.000 0.079
## mentions_rt -0.112 0.000 0.000 -0.471 -0.556
## blfTB:cnsIC 0.736 -0.991 -0.991 0.000 0.000 0.000
## convergence code: 0
## boundary (singular) fit: see ?isSingular
anova(model_full, model_just_fe)
## Data: df_fb_bl
## Models:
## model_just_fe: surprisal ~ belief + consistency + First_mention + Recent_mention +
## model_just_fe: mentions_ratio + (1 + belief * consistency | Item)
## model_full: surprisal ~ belief * consistency + First_mention + Recent_mention +
## model_full: mentions_ratio + (1 + belief * consistency | Item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_just_fe 17 11863 11962 -5914.6 11829
## model_full 18 11860 11964 -5911.8 11824 5.5723 1 0.01825 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_just_fe, model_no_belief)
## Data: df_fb_bl
## Models:
## model_no_belief: surprisal ~ consistency + First_mention + Recent_mention + mentions_ratio +
## model_no_belief: (1 + belief * consistency | Item)
## model_just_fe: surprisal ~ belief + consistency + First_mention + Recent_mention +
## model_just_fe: mentions_ratio + (1 + belief * consistency | Item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_no_belief 16 11865 11957 -5916.4 11833
## model_just_fe 17 11863 11962 -5914.6 11829 3.728 1 0.05351 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_just_fe, model_no_con)
## Data: df_fb_bl
## Models:
## model_no_con: surprisal ~ belief + First_mention + Recent_mention + mentions_ratio +
## model_no_con: (1 + belief * consistency | Item)
## model_just_fe: surprisal ~ belief + consistency + First_mention + Recent_mention +
## model_just_fe: mentions_ratio + (1 + belief * consistency | Item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_no_con 16 11888 11980 -5928.0 11856
## model_just_fe 17 11863 11962 -5914.6 11829 26.783 1 2.276e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df_tidy_results = broom.mixed::tidy(model_full)
df_tidy_results %>%
filter(effect == "fixed") %>%
ggplot(aes(x = term,
y = estimate)) +
geom_point() +
coord_flip() +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_errorbar(aes(ymin = estimate - 2*std.error,
ymax = estimate + 2*std.error),
width=.2,
position=position_dodge(.9)) +
theme_minimal()
model_full = lmer(data = df_fb_gpt3,
surprisal ~ belief * consistency +
First_mention +
Recent_mention +
mentions_ratio +
(1 + belief * consistency | Item),
control=lmerControl(optimizer="bobyqa"),
REML = FALSE)
## boundary (singular) fit: see ?isSingular
## Warning: Model failed to converge with 1 negative eigenvalue: -2.1e+01
model_just_fe = lmer(data = df_fb_gpt3,
surprisal ~ belief + consistency +
First_mention +
Recent_mention +
mentions_ratio +
(1 + belief * consistency | Item),
control=lmerControl(optimizer="bobyqa"),
REML = FALSE)
## boundary (singular) fit: see ?isSingular
## Warning: Model failed to converge with 1 negative eigenvalue: -5.4e+00
model_no_belief = lmer(data = df_fb_gpt3,
surprisal ~ consistency +
First_mention +
Recent_mention +
mentions_ratio +
(1 + belief * consistency | Item),
control=lmerControl(optimizer="bobyqa"),
REML = FALSE)
model_no_con = lmer(data = df_fb_gpt3,
surprisal ~ belief +
First_mention +
Recent_mention +
mentions_ratio +
(1 + belief * consistency | Item),
control=lmerControl(optimizer="bobyqa"),
REML = FALSE)
summary(model_full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: surprisal ~ belief * consistency + First_mention + Recent_mention +
## mentions_ratio + (1 + belief * consistency | Item)
## Data: df_fb_gpt3
## Control: lmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 4771.9 4875.9 -2367.9 4735.9 2382
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.4440 -0.4745 0.0057 0.4309 8.7051
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Item (Intercept) 2.2363 1.4954
## beliefTB 3.8264 1.9561 -0.76
## consistencyIC 4.0018 2.0004 -0.74 1.00
## beliefTB:consistencyIC 15.3332 3.9158 0.75 -1.00 -1.00
## Residual 0.2705 0.5201
## Number of obs: 2400, groups: Item, 120
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.27642 0.14021 127.28760 16.236 < 2e-16 ***
## beliefTB -0.80112 0.18108 119.99933 -4.424 2.14e-05 ***
## consistencyIC -0.68670 0.18507 120.00804 -3.711 0.000315 ***
## First_mentionStart 0.20485 0.03108 2039.68255 6.591 5.55e-11 ***
## Recent_mentionStart -0.09216 0.03297 2039.68251 -2.796 0.005230 **
## mentions_ratio -0.15575 0.02198 2039.68254 -7.087 1.88e-12 ***
## beliefTB:consistencyIC 1.43933 0.35997 119.99971 3.998 0.000111 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) belfTB cnssIC Frst_S Rcnt_S mntns_
## beliefTB -0.750
## consstncyIC -0.733 0.986
## Frst_mntnSt -0.092 0.000 0.000
## Rcnt_mntnSt 0.044 0.000 0.000 0.079
## mentions_rt -0.078 0.000 0.000 -0.471 -0.556
## blfTB:cnsIC 0.735 -0.992 -0.992 0.000 0.000 0.000
## convergence code: 0
## boundary (singular) fit: see ?isSingular
anova(model_full, model_just_fe)
## Data: df_fb_gpt3
## Models:
## model_just_fe: surprisal ~ belief + consistency + First_mention + Recent_mention +
## model_just_fe: mentions_ratio + (1 + belief * consistency | Item)
## model_full: surprisal ~ belief * consistency + First_mention + Recent_mention +
## model_full: mentions_ratio + (1 + belief * consistency | Item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_just_fe 17 4784.1 4882.5 -2375.1 4750.1
## model_full 18 4771.9 4875.9 -2367.9 4735.9 14.293 1 0.0001564 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_just_fe, model_no_belief)
## Data: df_fb_gpt3
## Models:
## model_no_belief: surprisal ~ consistency + First_mention + Recent_mention + mentions_ratio +
## model_no_belief: (1 + belief * consistency | Item)
## model_just_fe: surprisal ~ belief + consistency + First_mention + Recent_mention +
## model_just_fe: mentions_ratio + (1 + belief * consistency | Item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_no_belief 16 4791.2 4883.8 -2379.6 4759.2
## model_just_fe 17 4784.1 4882.5 -2375.1 4750.1 9.0841 1 0.002578 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_just_fe, model_no_con)
## Data: df_fb_gpt3
## Models:
## model_no_con: surprisal ~ belief + First_mention + Recent_mention + mentions_ratio +
## model_no_con: (1 + belief * consistency | Item)
## model_just_fe: surprisal ~ belief + consistency + First_mention + Recent_mention +
## model_just_fe: mentions_ratio + (1 + belief * consistency | Item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_no_con 16 4785.2 4877.7 -2376.6 4753.2
## model_just_fe 17 4784.1 4882.5 -2375.1 4750.1 3.0206 1 0.08221 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df_tidy_results = broom.mixed::tidy(model_full)
df_tidy_results %>%
filter(effect == "fixed") %>%
ggplot(aes(x = term,
y = estimate)) +
geom_point() +
coord_flip() +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_errorbar(aes(ymin = estimate - 2*std.error,
ymax = estimate + 2*std.error),
width=.2,
position=position_dodge(.9)) +
theme_minimal()